set more off
cap log close
clear
set virtual on
set mem 500M

log using sample_means_bootstrap, replace

*This program compares variance in the public and private sectors in the sample means of each outcome.
*Bootstrap standard errors are computed.
*Three alternative bootstrap methods are considered.

*Set paramters
global random_factor=50
global bootreps=500
tempfile basedat bootdat bootdat2 bootdat3


use analytic_file, clear
* Convert dollar values into thousands
foreach i in tot_med inpat_exp outpat_exp rx_exp {
replace `i'=`i'/1000
}

drop msa_count*

save "`basedat'", replace

*** BASELINE ESTIMATES *** 

** SAMPLE MEANS ESTIMATES **
** No Covariates


foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {
egen `i'_sd=sd(`i'), by(msa mcbs)
} 

gen msa_count=1

* Step 1: Collapse Data to Mean Values

collapse (sum) msa_count (mean) tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de *_sd , by(msa mcbs)

* Step 2: Generate variance estimates by outcome

foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {

* Variance of the sample mean
gen `i'_var=(`i'_sd^2)/msa_count

** Private sample

* Between variance of region means
qui sum `i' if mcbs==0
scalar `i'_obs_sm_priv=r(sd)^2

* Average within variance (the bias)
qui sum `i'_var if mcbs==0
scalar `i'_samperr_priv=r(mean)

* Corrected variance
scalar `i'_corr_sm_priv=`i'_obs_sm_priv-`i'_samperr_priv

** Public sample

* Between variance of region means
qui sum `i' if mcbs==1
scalar `i'_obs_sm_pub=r(sd)^2

* Average within variance (the bias)
qui sum `i'_var if mcbs==1
scalar `i'_samperr_pub=r(mean)

* Corrected variance
scalar `i'_corr_sm_pub=`i'_obs_sm_pub-`i'_samperr_pub

** Uncorrected and corrected variance differences
scalar `i'_var_diff_obs=`i'_obs_sm_pub-`i'_obs_sm_priv
scalar `i'_var_diff_corr=`i'_corr_sm_pub-`i'_corr_sm_priv

** Uncorrected and corrected variance differences
scalar `i'_var_rat_obs=`i'_obs_sm_pub/`i'_obs_sm_priv
scalar `i'_var_rat_corr=`i'_corr_sm_pub/`i'_corr_sm_priv
}


* Put values into matrix
foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {
matrix A_`i'=[`i'_obs_sm_priv, `i'_obs_sm_pub, `i'_samperr_priv, `i'_samperr_pub, `i'_corr_sm_priv, `i'_corr_sm_pub, `i'_var_diff_obs, `i'_var_diff_corr, `i'_var_rat_obs, `i'_var_rat_corr]
} 

matrix A_ut=[A_no_hosp \ A_days_hosp \ A_outpat_vis \ A_rx_30de]
matrix A_ex=[A_tot_med \ A_inpat_exp \ A_outpat_exp \ A_rx_exp]

matrix colnames A_ut=obs_priv obs_pub samperr_priv samperr_pub cor_priv cor_pub diff_obs diff_cor ratio_obs ratio_cor
matrix colnames A_ex=obs_priv obs_pub samperr_priv samperr_pub cor_priv cor_pub diff_obs diff_cor ratio_obs ratio_cor
matrix rownames A_ut=hospitalizations days outpat_visits scripts
matrix rownames A_ex=total inpatient outpatient drugs



******************************************************
***** BOOTSTRAP - RESAMPLE WITHIN PUBLIC/PRIVATE *****
******************************************************

forvalues z=1(1)$bootreps {
local g=`z'*$random_factor
set seed `g'

use "`basedat'", clear
bsample, strata(mcbs)

display "STARTING ROUND" `z'

foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {
egen `i'_sd=sd(`i'), by(msa mcbs)
} 

gen msa_count=1

* Step 1: Collapse Data to Mean Values

collapse (sum) msa_count (mean) tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de *_sd , by(msa mcbs)

* Step 2: Generate variance estimates by outcome

foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {

* Variance of the sample mean
gen `i'_var=(`i'_sd^2)/msa_count

** Private sample

* Between variance of region means
qui sum `i' if mcbs==0
scalar `i'_obs_sm_priv=r(sd)^2

* Average within variance (the bias)
qui sum `i'_var if mcbs==0
scalar `i'_samperr_priv=r(mean)

* Corrected variance
scalar `i'_corr_sm_priv=`i'_obs_sm_priv-`i'_samperr_priv

** Public sample

* Between variance of region means
qui sum `i' if mcbs==1
scalar `i'_obs_sm_pub=r(sd)^2

* Average within variance (the bias)
qui sum `i'_var if mcbs==1
scalar `i'_samperr_pub=r(mean)

* Corrected variance
scalar `i'_corr_sm_pub=`i'_obs_sm_pub-`i'_samperr_pub

** Uncorrected and corrected variance differences
scalar `i'_var_diff_obs=`i'_obs_sm_pub-`i'_obs_sm_priv
scalar `i'_var_diff_corr=`i'_corr_sm_pub-`i'_corr_sm_priv

** Uncorrected and corrected variance differences
scalar `i'_var_rat_obs=`i'_obs_sm_pub/`i'_obs_sm_priv
scalar `i'_var_rat_corr=`i'_corr_sm_pub/`i'_corr_sm_priv

preserve
foreach ss in `i'_obs_sm_priv `i'_obs_sm_pub `i'_samperr_priv `i'_samperr_pub `i'_corr_sm_priv `i'_corr_sm_pub `i'_var_diff_obs `i'_var_diff_corr `i'_var_rat_obs `i'_var_rat_corr {
gen `ss'=`ss'
}

* Collapse to bootrap round variables
collapse (mean) `i'_obs_sm_priv `i'_obs_sm_pub `i'_samperr_priv `i'_samperr_pub `i'_corr_sm_priv `i'_corr_sm_pub `i'_var_diff_obs `i'_var_diff_corr `i'_var_rat_obs `i'_var_rat_corr 

* Append data
gen bootround=`z'
cap append using "`bootdat'"
save "`bootdat'", replace
restore
}
} 

use "`bootdat'", clear


foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {
sum `i'*

* Compute the standard errors for the differences and ratios
qui sum `i'_var_diff_obs 
scalar `i'_diff_obs_se=r(sd)

qui sum `i'_var_diff_corr 
scalar `i'_diff_corr_se=r(sd)

qui sum `i'_var_rat_obs 
scalar `i'_rat_obs_se=r(sd)

qui sum `i'_var_rat_corr
scalar `i'_rat_corr_se=r(sd)

matrix B_`i'=[`i'_diff_obs_se, `i'_diff_corr_se, `i'_rat_obs_se, `i'_rat_corr_se] 
}

matrix B_ut=[B_no_hosp \ B_days_hosp \ B_outpat_vis \ B_rx_30de]
matrix B_ex=[B_tot_med \ B_inpat_exp \ B_outpat_exp \ B_rx_exp]

matrix colnames B_ut= se_diff_obs se_diff_cor se_ratio_obs se_ratio_cor
matrix colnames B_ex= se_diff_obs se_diff_cor se_ratio_obs se_ratio_cor
matrix rownames B_ut=hospitalizations days outpat_visits scripts
matrix rownames B_ex=total inpatient outpatient drugs


************************************************
***** BOOTSTRAP - RESAMPLE WITHIN CLUSTERS *****
************************************************

forvalues z=1(1)$bootreps {
local g=`z'*$random_factor
set seed `g'

use "`basedat'", clear
bsample, strata(msa mcbs)

display "STARTING ROUND" `z'

foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {
egen `i'_sd=sd(`i'), by(msa mcbs)
} 

gen msa_count=1

* Step 1: Collapse Data to Mean Values

collapse (sum) msa_count (mean) tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de *_sd , by(msa mcbs)

* Step 2: Generate variance estimates by outcome

foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {

* Variance of the sample mean
gen `i'_var=(`i'_sd^2)/msa_count

** Private sample

* Between variance of region means
qui sum `i' if mcbs==0
scalar `i'_obs_sm_priv=r(sd)^2

* Average within variance (the bias)
qui sum `i'_var if mcbs==0
scalar `i'_samperr_priv=r(mean)

* Corrected variance
scalar `i'_corr_sm_priv=`i'_obs_sm_priv-`i'_samperr_priv

** Public sample

* Between variance of region means
qui sum `i' if mcbs==1
scalar `i'_obs_sm_pub=r(sd)^2

* Average within variance (the bias)
qui sum `i'_var if mcbs==1
scalar `i'_samperr_pub=r(mean)

* Corrected variance
scalar `i'_corr_sm_pub=`i'_obs_sm_pub-`i'_samperr_pub

** Uncorrected and corrected variance differences
scalar `i'_var_diff_obs=`i'_obs_sm_pub-`i'_obs_sm_priv
scalar `i'_var_diff_corr=`i'_corr_sm_pub-`i'_corr_sm_priv

** Uncorrected and corrected variance differences
scalar `i'_var_rat_obs=`i'_obs_sm_pub/`i'_obs_sm_priv
scalar `i'_var_rat_corr=`i'_corr_sm_pub/`i'_corr_sm_priv

preserve
foreach ss in `i'_obs_sm_priv `i'_obs_sm_pub `i'_samperr_priv `i'_samperr_pub `i'_corr_sm_priv `i'_corr_sm_pub `i'_var_diff_obs `i'_var_diff_corr `i'_var_rat_obs `i'_var_rat_corr {
gen `ss'=`ss'
}

* Collapse to bootrap round variables
collapse (mean) `i'_obs_sm_priv `i'_obs_sm_pub `i'_samperr_priv `i'_samperr_pub `i'_corr_sm_priv `i'_corr_sm_pub `i'_var_diff_obs `i'_var_diff_corr `i'_var_rat_obs `i'_var_rat_corr 

* Append data
gen bootround=`z'
cap append using "`bootdat2'"
save "`bootdat2'", replace
restore
}
} 

use "`bootdat2'", clear


foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {
sum `i'*

* Compute the standard errors for the differences and ratios
qui sum `i'_var_diff_obs 
scalar `i'_diff_obs_se=r(sd)

qui sum `i'_var_diff_corr 
scalar `i'_diff_corr_se=r(sd)

qui sum `i'_var_rat_obs 
scalar `i'_rat_obs_se=r(sd)

qui sum `i'_var_rat_corr
scalar `i'_rat_corr_se=r(sd)

matrix C_`i'=[`i'_diff_obs_se, `i'_diff_corr_se, `i'_rat_obs_se, `i'_rat_corr_se] 
}

matrix C_ut=[C_no_hosp \ C_days_hosp \ C_outpat_vis \ C_rx_30de]
matrix C_ex=[C_tot_med \ C_inpat_exp \ C_outpat_exp \ C_rx_exp]

matrix colnames C_ut= se_diff_obs se_diff_cor se_ratio_obs se_ratio_cor
matrix colnames C_ex= se_diff_obs se_diff_cor se_ratio_obs se_ratio_cor
matrix rownames C_ut=hospitalizations days outpat_visits scripts
matrix rownames C_ex=total inpatient outpatient drugs



*************************************
***** BOOTSTRAP - BLOCK BY MSAs *****
*************************************

forvalues z=1(1)$bootreps {
local g=`z'*$random_factor
set seed `g'

use "`basedat'", clear
bsample, strata(mcbs) cluster(msa)

display "STARTING ROUND" `z'

foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {
egen `i'_sd=sd(`i'), by(msa mcbs)
} 

gen msa_count=1

* Step 1: Collapse Data to Mean Values

collapse (sum) msa_count (mean) tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de *_sd , by(msa mcbs)

* Step 2: Generate variance estimates by outcome

foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {

* Variance of the sample mean
gen `i'_var=(`i'_sd^2)/msa_count

** Private sample

* Between variance of region means
qui sum `i' if mcbs==0
scalar `i'_obs_sm_priv=r(sd)^2

* Average within variance (the bias)
qui sum `i'_var if mcbs==0
scalar `i'_samperr_priv=r(mean)

* Corrected variance
scalar `i'_corr_sm_priv=`i'_obs_sm_priv-`i'_samperr_priv

** Public sample

* Between variance of region means
qui sum `i' if mcbs==1
scalar `i'_obs_sm_pub=r(sd)^2

* Average within variance (the bias)
qui sum `i'_var if mcbs==1
scalar `i'_samperr_pub=r(mean)

* Corrected variance
scalar `i'_corr_sm_pub=`i'_obs_sm_pub-`i'_samperr_pub

** Uncorrected and corrected variance differences
scalar `i'_var_diff_obs=`i'_obs_sm_pub-`i'_obs_sm_priv
scalar `i'_var_diff_corr=`i'_corr_sm_pub-`i'_corr_sm_priv

** Uncorrected and corrected variance differences
scalar `i'_var_rat_obs=`i'_obs_sm_pub/`i'_obs_sm_priv
scalar `i'_var_rat_corr=`i'_corr_sm_pub/`i'_corr_sm_priv

preserve
foreach ss in `i'_obs_sm_priv `i'_obs_sm_pub `i'_samperr_priv `i'_samperr_pub `i'_corr_sm_priv `i'_corr_sm_pub `i'_var_diff_obs `i'_var_diff_corr `i'_var_rat_obs `i'_var_rat_corr {
gen `ss'=`ss'
}

* Collapse to bootrap round variables
collapse (mean) `i'_obs_sm_priv `i'_obs_sm_pub `i'_samperr_priv `i'_samperr_pub `i'_corr_sm_priv `i'_corr_sm_pub `i'_var_diff_obs `i'_var_diff_corr `i'_var_rat_obs `i'_var_rat_corr 

* Append data
gen bootround=`z'
cap append using "`bootdat3'"
save "`bootdat3'", replace
restore
}
} 

use "`bootdat3'", clear

foreach i in tot_med inpat_exp outpat_exp rx_exp no_hosp days_hosp outpat_vis rx_30de {
sum `i'*

* Compute the standard errors for the differences and ratios
qui sum `i'_var_diff_obs 
scalar `i'_diff_obs_se=r(sd)

qui sum `i'_var_diff_corr 
scalar `i'_diff_corr_se=r(sd)

qui sum `i'_var_rat_obs 
scalar `i'_rat_obs_se=r(sd)

qui sum `i'_var_rat_corr
scalar `i'_rat_corr_se=r(sd)

matrix D_`i'=[`i'_diff_obs_se, `i'_diff_corr_se, `i'_rat_obs_se, `i'_rat_corr_se] 
}

matrix D_ut=[D_no_hosp \ D_days_hosp \ D_outpat_vis \ D_rx_30de]
matrix D_ex=[D_tot_med \ D_inpat_exp \ D_outpat_exp \ D_rx_exp]

matrix colnames D_ut= se_diff_obs se_diff_cor se_ratio_obs se_ratio_cor
matrix colnames D_ex= se_diff_obs se_diff_cor se_ratio_obs se_ratio_cor
matrix rownames D_ut=hospitalizations days outpat_visits scripts
matrix rownames D_ex=total inpatient outpatient drugs



***********************************
******* DISPLAY THE RESULTS *******
***********************************


** UTILIZATION **
matrix list A_ut, format(%9.5fc)
matrix list B_ut, format(%9.5fc)
matrix list C_ut, format(%9.5fc)
matrix list D_ut, format(%9.5fc)

** SPENDING **
matrix list A_ex, format(%9.3fc)
matrix list B_ex, format(%9.3fc)
matrix list C_ex, format(%9.3fc)
matrix list D_ex, format(%9.5fc)


